#############################################################################################
#######Replication Code for Tables 1, 2, A2, Figures 2, A2###################################
######of "Ethnicity and Strategic Repression of Protest during the 2011 Syrian Uprising"#####
####by Konstantin Ash and Kevin Mazur, forthcoming in Perspectives on Politics###############
#############################################################################################

library(tidyverse)
library(sf)
library(lubridate)
library(xtable)

#Choose Working Drive from Replication Folder Location
setwd("/Users/kevinmazur/Desktop/KM_academic_work/strat_repr_w_konstantin/pop_final_submission_13jul25/ash_mazur_strategic_repression_pop_supplementary_16jul25")

#read covars
dd <- read_csv("l4_covars_pop_jul25.csv") 

##################
##################
#####TABLE 1######
##################
##################
cc <- dd %>% count(emaj5cat) %>% mutate(town_pct=round(100*n/sum(n),0)) %>% rename(town_count=n)
pp <- dd %>% group_by(emaj5cat) %>% summarize(tot_pop=sum(tot_pop)) %>% 
  mutate(pct_pop_wtd=round(100*tot_pop/sum(tot_pop),0))

cc %>% left_join(pp, by="emaj5cat")


##################
##################
####FIGURE 2#####
##################
##################

#yy <- read_sf("/Users/kevinmazur/Desktop/KM_academic_work/strat_repr_w_konstantin/syr_admin_shp_utf8_170601/syr_pplp_adm4.shp")
dcut <- dd %>% mutate(e5mix=ifelse(emixl4==1, "mixed", emaj5cat)) %>% 
  select(l4_pcode, l4_en, tot_pop, e5mix,  emaj5cat, psz) %>%
  rename(PCODE=l4_pcode)

ee <- read_csv("eventdb_discretecoding_l4.csv")
ss <- read_csv("syria_tracker_killings_data.csv")
bua <- read_sf("syr_ppla_bua.shp")

ss <- ss %>% filter(date<as.Date("2011-10-12")) %>%
  st_as_sf(coords=c("lon", "lat"), crs=4326)
ss <- ss %>% st_transform(3857)
bt <- bua %>% st_transform(st_crs(ss))

bt_1kbuff <- bt %>% st_buffer(1000)

mm <- st_intersects(ss, bt_1kbuff)

intersect_1 <- sapply(mm, "[",1)
intersect_2 <- sapply(mm, "[",2)
intersect_3 <- sapply(mm, "[",3)
intersect_4 <- sapply(mm, "[",4)

#double counts at 1k: 143 2x, 21 3x, 10 4x, 0 5x
sum(!is.na(sapply(mm, "[",2)))
sum(!is.na(sapply(mm, "[",3)))
sum(!is.na(sapply(mm, "[",4)))
sum(!is.na(sapply(mm, "[",5)))

#first locale
totalup <- data.frame(date=ss$date[1], PCODE=bt_1kbuff$PCODE[intersect_1[1]])
for (i in 2:nrow(ss)){
  t2 <- data.frame(date=ss$date[i], PCODE=bt_1kbuff$PCODE[intersect_1[i]])
  totalup <- totalup %>% add_row(t2)
}
#second locale
loc2 <- data.frame(date=ss$date[1], PCODE=bt_1kbuff$PCODE[intersect_2[1]])
for (i in 2:nrow(ss)){
  t2 <- data.frame(date=ss$date[i], PCODE=bt_1kbuff$PCODE[intersect_2[i]])
  loc2 <- loc2 %>% add_row(t2)
}
loc2 <- loc2 %>% filter(!is.na(PCODE))
tot12 <- totalup %>% add_row(loc2)
#third locale
loc3 <- data.frame(date=ss$date[1], PCODE=bt_1kbuff$PCODE[intersect_3[1]])
for (i in 2:nrow(ss)){
  t2 <- data.frame(date=ss$date[i], PCODE=bt_1kbuff$PCODE[intersect_3[i]])
  loc3 <- loc3 %>% add_row(t2)
}
loc3 <- loc3 %>% filter(!is.na(PCODE))
tot123 <- tot12 %>% add_row(loc3)
#fourth locale
loc4 <- data.frame(date=ss$date[1], PCODE=bt_1kbuff$PCODE[intersect_4[1]])
for (i in 2:nrow(ss)){
  t2 <- data.frame(date=ss$date[i], PCODE=bt_1kbuff$PCODE[intersect_4[i]])
  loc4 <- loc4 %>% add_row(t2)
}
loc4 <- loc4 %>% filter(!is.na(PCODE))
tot1234 <- tot123 %>% add_row(loc4)


st_ethn <- left_join(tot1234, dcut, by="PCODE")

ed <- left_join(ee, dd %>% select(l4_pcode, l4_en, emaj5cat), by="l4_pcode")


kbprot <- ed %>% rename(date=date_start) %>%
  filter(date<as.Date("2011-10-02")) %>% #, emaj5cat %in% c("badawi", "kurdish", "sunnifam")) %>% 
  mutate(anychall=ifelse(ch_pcfl==1 | ch_sviol==1 | ch_cviol==1, 1, 0)) %>% filter(anychall==1) %>%
  select(emaj5cat, date) %>% mutate(type="Protests")

kbdeath <- st_ethn %>% 
#  filter(emaj5cat %in% c("badawi", "kurdish", "sunnifam")) %>% 
  select(emaj5cat, date) %>% mutate(type="Deaths")

kball <- kbprot %>% add_row(kbdeath)

kbonly <- kball  %>% filter(emaj5cat %in% c("kurdish", "badawi"))
kbonly$emaj5cat[kbonly$emaj5cat=="kurdish"] <- "Kurdish\n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="badawi"] <- "Sunni Arab tribal\n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="othermin"] <- "other minorities"
kbonly$emaj5cat[kbonly$emaj5cat=="sunnifam"] <- "Sunni Arab families"
kbonly$emaj5cat[kbonly$emaj5cat=="alawi"] <- "Alawi"

pdf("dotchart_intro_kurd_sunartr_only_9jul25.pdf", h=3, w=5)
ggplot(kbonly, aes(date, type))+
  geom_jitter(height=.2, width=.2, shape=21, color="blue", size=.75)+
  scale_x_date(date_breaks = "1 month", date_labels =  "%b \n %Y")+
  facet_grid(rows=vars(emaj5cat))+
  labs(x="", y="") #, title="protest events and challenger deaths by town majority ethnic identity, March - September 2011")
dev.off()

##################
##################
#### Figure A2 ###
##################
##################

kbonly <- kball # %>% filter(emaj5cat %in% c("kurdish", "badawi"))
kbonly$emaj5cat[kbonly$emaj5cat=="kurdish"] <- "Kurdish\n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="badawi"] <- "Sunni Arab tribal\n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="othermin"] <- "other minorities \n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="sunnifam"] <- "Sunni Arab families \n town majority"
kbonly$emaj5cat[kbonly$emaj5cat=="alawi"] <- "Alawi"


pdf("dotchart_appendix_allethn_9jul25.pdf", h=7, w=8)
ggplot(kbonly %>% filter(!is.na(emaj5cat)), aes(date, type))+
  geom_jitter(height=.2, width=.2, shape=21, color="blue")+
  scale_x_date(date_breaks = "1 month", date_labels =  "%b \n %Y")+
  facet_grid(rows=vars(emaj5cat))+
  labs(x="", y="") #, title="protest events and challenger deaths by town majority ethnic identity, March - September 2011")
dev.off()

##################
##################
#### TABLE 2 ####
##################
##################

prot_dateloc <- ed %>% rename(date=date_start) %>%
  filter(date<as.Date("2011-10-12")) %>% 
  mutate(anychall=ifelse(ch_pcfl==1 | ch_sviol==1 | ch_cviol==1, 1, 0)) %>% filter(anychall==1) %>%
  rename(PCODE=l4_pcode) %>% select(PCODE, date) %>% 
  mutate(end_date=date+14, window_2wk=interval(date, end_date))

prot_dateloc$death_in_2wk_window_st <- NA

ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[1]) %>% pull(date)
prot_dateloc$death_in_2wk_window_st[1] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[1])>0, "yes", "no")

for(i in 2:nrow(prot_dateloc)){
  ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[i]) %>% pull(date)
  prot_dateloc$death_in_2wk_window_st[i] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[i])>0, "yes", "no")
}  

pcts <- prot_dateloc %>% left_join(dcut, by="PCODE") %>% group_by(emaj5cat) %>%
  count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) %>% 
  filter(death_in_2wk_window_st=="yes") %>% select(-death_in_2wk_window_st, -n)
tot <- prot_dateloc %>% left_join(dcut, by="PCODE") %>% count(emaj5cat) %>% rename(tot_prot_count=n)

l14 <- tot %>% left_join(pcts %>% rename(l14=pct), by="emaj5cat")
l14
#14day=62pct
# prot_dateloc %>% left_join(dcut, by="PCODE") %>% 
#   count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) 


###################################
###################################
# APPENDIX TABLE A2################
###################################
###################################

#ALT LAG: 1 week
prot_dateloc <- ed %>% rename(date=date_start) %>%
  filter(date<as.Date("2011-10-12")) %>% 
  mutate(anychall=ifelse(ch_pcfl==1 | ch_sviol==1 | ch_cviol==1, 1, 0)) %>% filter(anychall==1) %>%
  rename(PCODE=l4_pcode) %>% select(PCODE, date) %>% 
  mutate(end_date=date+7, window_2wk=interval(date, end_date))

prot_dateloc$death_in_2wk_window_st <- NA

ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[1]) %>% pull(date)
prot_dateloc$death_in_2wk_window_st[1] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[1])>0, "yes", "no")

for(i in 2:nrow(prot_dateloc)){
  ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[i]) %>% pull(date)
  prot_dateloc$death_in_2wk_window_st[i] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[i])>0, "yes", "no")
}  

pcts <- prot_dateloc %>% left_join(dcut, by="PCODE") %>% group_by(emaj5cat) %>%
  count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) %>% 
  filter(death_in_2wk_window_st=="yes") %>% select(-death_in_2wk_window_st, -n) %>% rename(l7=pct)

l14 <- l14 %>% left_join(pcts, by="emaj5cat")

#7day=56pct
# prot_dateloc %>% left_join(dcut, by="PCODE") %>% 
#   count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) 


#ALT LAG: 10 days
prot_dateloc <- ed %>% rename(date=date_start) %>%
  filter(date<as.Date("2011-10-12")) %>% 
  mutate(anychall=ifelse(ch_pcfl==1 | ch_sviol==1 | ch_cviol==1, 1, 0)) %>% filter(anychall==1) %>%
  rename(PCODE=l4_pcode) %>% select(PCODE, date) %>% 
  mutate(end_date=date+10, window_2wk=interval(date, end_date))

prot_dateloc$death_in_2wk_window_st <- NA

ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[1]) %>% pull(date)
prot_dateloc$death_in_2wk_window_st[1] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[1])>0, "yes", "no")

for(i in 2:nrow(prot_dateloc)){
  ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[i]) %>% pull(date)
  prot_dateloc$death_in_2wk_window_st[i] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[i])>0, "yes", "no")
}  

pcts <- prot_dateloc %>% left_join(dcut, by="PCODE") %>% group_by(emaj5cat) %>%
  count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) %>% 
  filter(death_in_2wk_window_st=="yes") %>% select(-death_in_2wk_window_st, -n) %>% rename(l10=pct)
l14 <- l14 %>% left_join(pcts, by="emaj5cat")

#10day=59pct
# prot_dateloc %>% left_join(dcut, by="PCODE") %>% 
#   count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) 


#ALT LAG: 4 weeks
prot_dateloc <- ed %>% rename(date=date_start) %>%
  filter(date<as.Date("2011-10-12")) %>% 
  mutate(anychall=ifelse(ch_pcfl==1 | ch_sviol==1 | ch_cviol==1, 1, 0)) %>% filter(anychall==1) %>%
  rename(PCODE=l4_pcode) %>% select(PCODE, date) %>% 
  mutate(end_date=date+28, window_2wk=interval(date, end_date))

prot_dateloc$death_in_2wk_window_st <- NA

ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[1]) %>% pull(date)
prot_dateloc$death_in_2wk_window_st[1] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[1])>0, "yes", "no")

for(i in 2:nrow(prot_dateloc)){
  ll <- st_ethn %>% filter(PCODE==prot_dateloc$PCODE[i]) %>% pull(date)
  prot_dateloc$death_in_2wk_window_st[i] <- ifelse(sum(ll %within% prot_dateloc$window_2wk[i])>0, "yes", "no")
}  

pcts <- prot_dateloc %>% left_join(dcut, by="PCODE") %>% group_by(emaj5cat) %>%
  count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) %>% 
  filter(death_in_2wk_window_st=="yes") %>% select(-death_in_2wk_window_st, -n) %>% rename(l28=pct)

#28day=69pct
# prot_dateloc %>% left_join(dcut, by="PCODE") %>% 
#   count(death_in_2wk_window_st) %>% mutate(pct=round(100*n/sum(n),0)) 


#APPENDIX TABLE VARYING WINDOW
l14 <- l14 %>% left_join(pcts, by="emaj5cat") 

mm <- xtable(l14 %>% select(emaj5cat, tot_prot_count, l7, l10, l14, l28), digits=0)
print(mm, include.rownames=FALSE)

#tot events (791) and window overall avgs 7:56, 10:59, 14:62, 28: 69

